#delimit;
** REPLACE FILE PATH WITH PATH TO RELEVANT REPLICATION FILES;
local fileloc = "~/KMS_REPLICATION";
set logtype text;
capture log close zip_level_pollution;

log using `fileloc'/log_files/zip_level_pollution.txt, name(zip_level_pollution) replace;
set more off;
clear all;
pause on;


**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**XXXXXXXXXXXXXXXXXXXX BUILDING ZIP CODE POLLUTION VALUES XXXXXXXXXXXXXXXXX
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

** Restrict to sensors within 20 miles of a zip code (see text);
use `fileloc'/data/location_data/pollution_to_zip_distance.dta, clear;
		
drop if distance > 20;
			
joinby location using `fileloc'/data/emissions_data/weekly_all_pollutants_KMS;

** Weight by inverse of distance to zip code centroid;
gen weight = 1/distance;
	
collapse year weekly_co weekly_oz weekly_pm10 (min) min_dist = distance [w = weight], by(mother_zip week) fast;
		
** Keep only those zip codes with all three pollutants;
** Note: current version does not include ozone, but we keep here for consistency with NBER version;
drop if weekly_pm10 == . | weekly_co == . | weekly_oz == .;

** How many zips?;
gsort mother_zip, g(howmany);
sum howmany;
drop howmany;

tab year;

save `fileloc'/data/emissions_data/20mile_pollution_KMS.dta, replace;
		

** Trimester exposure approximation;
use `fileloc'/data/emissions_data/20mile_pollution_KMS.dta, clear;

** CREATE MOVING AVERAGE FOR 1ST, 2ND, AND 3RD TRIMESTER EXPOSURE;	
	
** Note: third trimester done starting with 26th week - this way everyone has at least one week of third trimester exposure;	

sort mother_zip week;	
	
foreach pollutant in weekly_co weekly_pm10 weekly_oz {;
	
	quietly {;

		* Trimester 1;		
		by mother_zip : gen tri3`pollutant' = `pollutant'[_n-1];
			
		forvalues weekback = 2/12 {;
			by mother_zip : replace tri3`pollutant' = tri3`pollutant' + `pollutant'[_n-`weekback'];
		};
	
		replace tri3`pollutant' = tri3`pollutant'/12;
		
		* Trimester 2;
		by mother_zip : gen tri2`pollutant' = `pollutant'[_n-13];
			
		forvalues weekback = 14/24 {;
			by mother_zip : replace tri2`pollutant' = tri2`pollutant' + `pollutant'[_n-`weekback'];
		};
	
		replace tri2`pollutant' = tri2`pollutant'/12;
		
		* Trimester 3;
		by mother_zip : gen tri1`pollutant' = `pollutant'[_n-25];
			
		forvalues weekback = 26/36 {;
			by mother_zip : replace tri1`pollutant' = tri1`pollutant' + `pollutant'[_n-`weekback'];
		};
	
		replace tri1`pollutant' = tri1`pollutant'/12;
		
	};

};
	
keep mother_zip week tri*;
rename week birth_week;
sort mother_zip birth_week;

** Keep only those with trimseter information;
foreach pollutant in co oz pm10 {;
	drop if tri1weekly_`pollutant' == . ;
	drop if tri2weekly_`pollutant' == . ;
	drop if tri3weekly_`pollutant' == .	;
};
	
save `fileloc'/data/emissions_data/20mile_trimesters_KMS.dta, replace;

log close zip_level_pollution;
